full <- merge(s_agg, denom, by=c("county"), all=T)
fips$county <- tolower(paste0(fips$state,",",fips$name))
fips <- merge(fips, reduce, by.x="fips", by.y="DemoGroup1")
full0 <- merge(full, fips, by="county")
full2 <- merge(full0, c_tot, by.x="fips", by.y="Fips")
#drop county obs with no denom obs?
full2 <- full2[!is.na(full2$denom),]
full2$crime[is.na(full2$crime)]<- 0
full2$welfare[is.na(full2$welfare)]<- 0
full2$hsi[is.na(full2$hsi)]<- 0
full2$report[is.na(full2$report)]<- 0
full2$crime_prop <- full2$crime/full2$denom
full2$welfare_prop <- full2$welfare/full2$denom
full2$report_prop <- full2$report/full2$denom
full2$hsi_prop <- full2$hsi/full2$denom
full2$oppose_immigration <- -1*full2$p
#regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regc, factor(full2$county.x))
cc <- coeftest(regc, vcov = vcov)
#regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regw, factor(full2$county.x))
cw <- coeftest(regw, vcov = vcov)
#regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regr, factor(full2$county.x))
cr <- coeftest(regr, vcov = vcov)
#regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regh, factor(full2$county.x))
ch <- coeftest(regh, vcov = vcov)
full2$trump_vote <- 1-full2$dpct
#regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regc, factor(full2$county.x))
cct <- coeftest(regc, vcov = vcov)
#regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regw, factor(full2$county.x))
cwt <- coeftest(regw, vcov = vcov)
#regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regr, factor(full2$county.x))
crt <- coeftest(regr, vcov = vcov)
#regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regh, factor(full2$county.x))
cht <- coeftest(regh, vcov = vcov)
library(stargazer)
stargazer(cc, cct, cw, cwt,cr, crt, single.row=T, type="text")
library(data.table)
library(stringr)
library(sp)
library(maps)
library(maptools)
library(lmtest)
library(multiwayvcov)
library(naniar)
library(stm)
library(beepr)
latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per state (plus DC, minus HI & AK)
states <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(states$names, ":"), function(x) x[1])
states_sp <- map2SpatialPolygons(states, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, states_sp)
# Return the state names of the Polygons object containing each point
stateNames <- sapply(states_sp@polygons, function(x) x@ID)
stateNames[indices]
}
county_clean <- function(x){
x <- gsub("de kalb", "dekalb",x)
x <- gsub("du page", "dupage",x)
x <- gsub("la salle", "lasalle",x)
x <- gsub("la porte", "laporte",x)
x <- gsub("baltimore city", "baltimore",x)
x <- gsub("de soto", "desoto",x)
x <- gsub("de witt", "dewitt",x)
x <- gsub("o'brien", "obrien",x)
x <- gsub("la moure", "lamoure",x)
return(x)
}
county_clean2 <- function(x){
x <- gsub("la grange", "lagrange",x)
x <- gsub("o brien", "obrien",x)
return(x)
}
setwd("C:\\Users\\b-makrup.NORTHAMERICA\\Documents\\old_docs")
reduce <- read.csv("ReduceLegalImmigration_predictions_pw.csv")
border <- read.csv("BorderSecurity_predictions_pw.csv")
border <- read.csv("BorderSecurity_predictions_pw.csv")
fips <- read.csv("fips_county_cross.csv")
fips$state <- tolower(state.name[match(fips$state,state.abb)])
fips$name <- county_clean2(tolower(fips$name))
setwd("//msrnyc-00/Root/TVSearch/Masha/Racism/final_analysis/sec3/geog")
### clean demographics ###
c1 <- read.csv("nhgis0033_ds172_2010_county.csv")
c1$urban <- c1$H7W002/c1$H7W001
p16 <- read.csv("presvote16.csv")
p16$dpct <- p16$Democrats.2016/(p16$Democrats.2016+p16$Republicans.2016)
c1$COUNTYA <- str_pad(c1$COUNTYA,3,pad="0",side="left")
c1$Fips <- paste0(c1$STATEA, c1$COUNTYA)
p16 <- p16[,c("Fips","dpct")]
c1<- merge(c1, p16, by="Fips")
c1 <- c1[,c("Fips","STATE","COUNTY","urban","dpct")]
c2 <- read.csv("nhgis0033_ds233_20175_2017_county.csv")
c2$black<- c2$AHY2E003/c2$AHY2E001
c2$hisp <- c2$AHZAE012/c2$AHZAE001
c2$bacc <- (c2$AH04E022+c2$AH04E023+c2$AH04E024+c2$AH04E025)/c2$AH04E001
c2$medianhhinc <- c2$AH1PE001
c2 <- c2[,c("STATE","COUNTY","black","hisp","bacc","medianhhinc")]
c3 <- read.csv("nhgis0033_ds234_20175_2017_county.csv")
c3$foreign_born <- (c3$AH8VE001-(c3$AH8VE002+c3$AH8VE003))/c3$AH8VE001
c3 <- c3[,c("STATE","COUNTY","foreign_born")]
c_tot <- merge(c1,c2)
c_tot <- merge(c_tot, c3)
c_tot$COUNTY<- tolower(gsub(" County|\\.| Parish","", c_tot$COUNTY))
c_tot$STATE <- tolower(c_tot$STATE)
c_tot$county <- paste0(c_tot$STATE,",",c_tot$COUNTY)
c_tot <- c_tot[,c("Fips","urban","black","hisp","bacc","medianhhinc","foreign_born","dpct","county")]
c_tot$county <- gsub("'s","s",c_tot$county)
### read in search ###
#get daily searches per county
### CRIME ###
#terms:
#Intersect (crime|criminal|murder|kill) & (immigr|illegals|illegal alien)
setwd("~/")
bg_crime <- fread("geog_crime_raw.txt", data.table=F, header=F, quote="")
colnames(bg_crime) <- c("query","date","lat","lon")
#clean up and remove unrelated searches
#remove terms that are unrelated or botted
bg_crime <- bg_crime[grep("deer were killed by predators|sweden immigrants crime|ICE immigrant-crime hotline calls|criminal justice reform|theodore roosevelt on race, riots, immigration, and crime book",bg_crime$query,ignore.case=T, invert=T),]
bg_crime$county <- latlong2county(bg_crime[,c("lon","lat")])
bg_crime$date <- as.Date(bg_crime$date)
tmp <- bg_crime[bg_crime$date>=as.Date("2016-03-01"),]
tmp <- tmp[tmp$date>as.Date("2018-12-31")|tmp$date<as.Date("2018-12-01"),]
c_agg <- as.data.frame(table(tmp$county))
### WELFARE ###
#terms:
#Intersect (welfare|cost|benefits) & (immigr|illegals|illegal alien)
bg_welfare <- fread("geog_welfare_raw.txt", data.table=F, header=F, quote="")
colnames(bg_welfare) <- c("query","date","lat","lon")
#clean up and remove unrelated searches
#remove terms that are unrelated or botted
bg_welfare <- bg_welfare[grep("low cost immigration law services|immigration benefits|costa rica immigration|immigrant costumes|benefits of illegal immigration|process and cost for immigrants coming to usa|benefits of immigrants|low cost immigration law services orange county ca|benefits of immigration|California immigrants health benefits",bg_welfare$query,ignore.case=T, invert=T),]
bg_welfare$county <- latlong2county(bg_welfare[,c("lon","lat")])
bg_welfare$date <- as.Date(bg_welfare$date)
tmp <- bg_welfare[bg_welfare$date>=as.Date("2016-03-01"),]
tmp <- tmp[tmp$date>as.Date("2018-12-31")|tmp$date<as.Date("2018-12-01"),]
w_agg <- as.data.frame(table(tmp$county))
### REPORT ###
#terms:
#Intersect (report) & (immigr|illegals|illegal alien|to ice)
bg_report <- fread("geog_report_raw.txt", data.table=F, header=F, quote="")
colnames(bg_report) <- c("v1","query","date","v4","lat","lon","v7")
#clean up and remove unrelated searches
#remove terms that are unrelated
bg_report <- bg_report[grep("immigration commission report|: Report|us news and world report|crime report|the parents report|summarizing a report|IG Report|MSNBC Report|criminal illegal alien report|Report On|sample report|thai immigration 90 day report|report from the Government Accounting Office|intelligence report|responds to immigration report|center for immigration studies report",bg_report$query,ignore.case=T, invert=T),]
bg_report$county <- latlong2county(bg_report[,c("lon","lat")])
bg_report$date <- as.Date(bg_report$date)
tmp <- bg_report[bg_report$date>=as.Date("2016-03-01"),]
tmp <- tmp[tmp$date>as.Date("2018-12-31")|tmp$date<as.Date("2018-12-01"),]
r_agg <- as.data.frame(table(tmp$county))
### HSI ###
#terms:
#searches that led to at least one click to following url:
#https://www.ice.gov/webform/hsi-tip-form
bg_hsi <- fread("geog_hsi_raw.txt", data.table=F, header=F, quote="")
colnames(bg_hsi) <- c("date","query","lat","lon")
bg_hsi <- na.omit(bg_hsi)
bg_hsi$county <- latlong2county(bg_hsi[,c("lon","lat")])
bg_hsi$date <- as.Date(bg_hsi$date)
tmp <- bg_hsi[bg_hsi$date>=as.Date("2016-03-01"),]
tmp <- tmp[tmp$date>as.Date("2018-12-31")|tmp$date<as.Date("2018-12-01"),]
h_agg <- as.data.frame(table(tmp$county))
### Denominators ###
d1 <- fread("county_denoms_1.txt")
d2 <- fread("county_denoms_2.txt")
d3 <- fread("county_denoms_3.txt")
d4 <- fread("county_denoms_4.txt")
d5 <- fread("county_denoms_5.txt")
d5 <- d5[,c("V2","V3","V4","V1")]
colnames(d1)<- c("date","lat","lon","n")
d1$county <- latlong2county(d1[,c("lon","lat")])
colnames(d2)<- c("date","lat","lon","n")
d2$county <- latlong2county(d2[,c("lon","lat")])
colnames(d3)<- c("date","lat","lon","n")
d3$county <- latlong2county(d3[,c("lon","lat")])
colnames(d4)<- c("date","lat","lon","n")
d4$county <- latlong2county(d4[,c("lon","lat")])
colnames(d5)<- c("date","lat","lon","n")
d5 <- na.omit(d5)
d5$county <- latlong2county(d5[,c("lon","lat")])
d1 <- na.omit(d1[,c("date","county","n")])
d2 <- na.omit(d2[,c("date","county","n")])
d3 <- na.omit(d3[,c("date","county","n")])
d4 <- na.omit(d4[,c("date","county","n")])
d5 <- na.omit(d5[,c("date","county","n")])
d1$date <- as.Date(d1$date)
d2$date <- as.Date(d2$date)
d3$date <- as.Date(d3$date)
d4$date <- as.Date(d4$date)
d5$date <- as.Date(d5$date)
#fixing dates to make sure everything overlaps
d1 <- d1[d1$date>=as.Date("2016-03-01"),]
d1 <- d1[d1$date<=as.Date("2019-07-31"),]
d1 <- d1[d1$date>as.Date("2018-12-31")|d1$date<as.Date("2018-12-01"),]
d2 <- d2[d2$date>=as.Date("2016-03-01"),]
d2 <- d2[d2$date<=as.Date("2019-07-31"),]
d2 <- d2[d2$date>as.Date("2018-12-31")|d2$date<as.Date("2018-12-01"),]
d3 <- d3[d3$date>=as.Date("2016-03-01"),]
d3 <- d3[d3$date<=as.Date("2019-07-31"),]
d3 <- d3[d3$date>as.Date("2018-12-31")|d3$date<as.Date("2018-12-01"),]
d4 <- d4[d4$date>=as.Date("2016-03-01"),]
d4 <- d4[d4$date<=as.Date("2019-07-31"),]
d4 <- d4[d4$date>as.Date("2018-12-31")|d4$date<as.Date("2018-12-01"),]
d5 <- d5[d5$date>=as.Date("2016-03-01"),]
d5 <- d5[d5$date<=as.Date("2019-07-31"),]
d5 <- d5[d5$date>as.Date("2018-12-31")|d5$date<as.Date("2018-12-01"),]
agg1 <- aggregate(d1$n, list(d1$county), sum)
agg2 <- aggregate(d2$n, list(d2$county), sum)
agg3 <- aggregate(d3$n, list(d3$county), sum)
agg4 <- aggregate(d4$n, list(d4$county), sum)
agg5 <- aggregate(d5$n, list(d5$county), sum)
denom <- rbind(agg1,agg2,agg3,agg4,agg5)
rm(d1,d2,d3,d4,d5)
colnames(denom) <- c("county","denom")
denom <- aggregate(denom$denom, list(denom$county), sum)
colnames(denom) <- c("county","denom")
colnames(c_agg) <- c("county","crime")
colnames(w_agg) <- c("county","welfare")
colnames(r_agg) <- c("county","report")
colnames(h_agg) <- c("county","hsi")
s_agg <- merge(h_agg,merge(r_agg,merge(c_agg, w_agg, by=c("county"),all=T), by=c("county"),all=T), by=c("county"),all=T)
full <- merge(s_agg, denom, by=c("county"), all=T)
fips$county <- tolower(paste0(fips$state,",",fips$name))
fips <- merge(fips, reduce, by.x="fips", by.y="DemoGroup1")
full0 <- merge(full, fips, by="county")
full2 <- merge(full0, c_tot, by.x="fips", by.y="Fips")
#drop county obs with no denom obs?
full2 <- full2[!is.na(full2$denom),]
full2$crime[is.na(full2$crime)]<- 0
full2$welfare[is.na(full2$welfare)]<- 0
full2$hsi[is.na(full2$hsi)]<- 0
full2$report[is.na(full2$report)]<- 0
full2$crime_prop <- full2$crime/full2$denom
full2$welfare_prop <- full2$welfare/full2$denom
full2$report_prop <- full2$report/full2$denom
full2$hsi_prop <- full2$hsi/full2$denom
full2$oppose_immigration <- -1*full2$p
#regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regc, factor(full2$county.x))
cc <- coeftest(regc, vcov = vcov)
#regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regw, factor(full2$county.x))
cw <- coeftest(regw, vcov = vcov)
#regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regr, factor(full2$county.x))
cr <- coeftest(regr, vcov = vcov)
#regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born + oppose_immigration:foreign_born, data=full2, family="binomial"))
regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ oppose_immigration + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regh, factor(full2$county.x))
ch <- coeftest(regh, vcov = vcov)
full2$trump_vote <- 1-full2$dpct
#regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regc <- (glm(cbind(full2$crime, full2$denom-full2$crime)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regc, factor(full2$county.x))
cct <- coeftest(regc, vcov = vcov)
#regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regw <- (glm(cbind(full2$welfare, full2$denom-full2$welfare)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regw, factor(full2$county.x))
cwt <- coeftest(regw, vcov = vcov)
#regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regr <- (glm(cbind(full2$report, full2$denom-full2$report)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regr, factor(full2$county.x))
crt <- coeftest(regr, vcov = vcov)
#regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born + trump_vote:foreign_born, data=full2, family="binomial"))
regh <- (glm(cbind(full2$hsi, full2$denom-full2$hsi)~ urban + black +medianhhinc + hisp + bacc+ trump_vote + foreign_born, data=full2, family="binomial"))
vcov <- cluster.vcov(regh, factor(full2$county.x))
cht <- coeftest(regh, vcov = vcov)
library(stargazer)
stargazer(cc, cct, cw, cwt,cr, crt, single.row=T, type="text")
#stargazer(cr, crt, ch, cht, single.row=T, type="text")
install.packages("here")
here()
library(here)
here()
i_am("GallupPlotReplication.R")
setwd(here("data"))
car <- read.csv("car_EconSurvey_release.csv")
house <- read.csv("house_EconSurvey_release.csv")
stock <- read.csv("stock_EconSurvey_release.csv")
library(lubridate)
library(ggplot2)
library(here)
setwd(here("data"))
here()
library(rjson)
library(stringr)
library(ggplot2)
library(R.utils)
library(multiwayvcov)
library(lmtest)
library(stm)
library(stopwords)
library(data.table)
library(stargazer)
library(here)
#setwd(here("release_data"))
setwd("C:\\Users\\b-makrup.NORTHAMERICA\\Documents\\ImmigrationReplicationFull\\release_data")
load("TopicModel.RData")
### FIGURE 1 ###
par(mar = c(1,1,1,1))
plotQuote(unique(findThoughts(immigrFit, texts=out$meta$txt, topics=c(1), n=1)$docs[[1]]), width=50,text.cex=1, main="")
plotQuote(unique(findThoughts(immigrFit, texts=out$meta$txt, topics=c(3), n=1)$docs[[1]]), width=50,text.cex=1, main="")
plotQuote(unique(findThoughts(immigrFit, texts=out$meta$txt, topics=c(10), n=1)$docs[[1]]), width=50,text.cex=1, main="")
plotQuote(unique(findThoughts(immigrFit, texts=out$meta$txt, topics=c(13), n=1)$docs[[1]]), width=50,text.cex=1, main="")
placeholder <- lm(1:nrow(out$meta) ~ time +channel +time:channel, data = out$meta)
#general crime
summary(prep)[3]$tables[[1]]
stargazer(placeholder,coef=list(summary(prep)[3]$tables[[1]][,1]), se=list(summary(prep)[3]$tables[[1]][,2]),t=list(summary(prep)[3]$tables[[1]][,3]),p=list(summary(prep)[3]$tables[[1]][,4]), single.row=T)
labs <- labelTopics(immigrFit)
labs
#get topics per doc to get topic proportions/day
dt <- make.dt(immigrFit)
#define crime and welfare topics
dt$crime <- dt$Topic1 + dt$Topic3
dt$welfare <- dt$Topic13
dt <- dt[,c("docnum","crime","welfare")]
meta <- out$meta
meta <- cbind(meta, dt)
#plot number of immigr segments per date
meta$date <- as.Date(meta$date)
#make sure to get any dates with zero segs
unique_dates <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates$channel <- "fox"
unique_dates2 <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates2$channel <- "msnbc"
unique_dates3 <- data.frame(Date=seq(from=min(unique(meta$date)), to=max(unique(meta$date)), by=1))
unique_dates3$channel <- "cnn"
unique_dates <- rbind(unique_dates, unique_dates2, unique_dates3)
num_immigr <- aggregate(meta$welfare,list(meta$date, meta$channel), length)
colnames(num_immigr)<- c("Date","channel","n")
num_immigr <- merge(num_immigr, unique_dates, by=c("Date","channel"), all=T)
num_immigr$n[is.na(num_immigr$n)]<- 0
num_immigr$Date <- as.Date(num_immigr$Date)
num_immigr$mo_yr <- gsub("-\\d\\d$","-01", num_immigr$Date)
num_immigr$post_election <- ifelse(num_immigr$Date>=as.Date("2017-01-20"),1,0)
num_immigr$post_trump <- ifelse(num_immigr$Date>=as.Date("2015-06-16")&num_immigr$Date<=as.Date("2017-01-20"),1,0)
num_immigr$time <- "pre-election"
num_immigr$time[num_immigr$post_trump==1] <- "election"
num_immigr$time[num_immigr$post_election==1] <- "post-election"
#aggregate by month
num_immigr <- aggregate(num_immigr$n,list(num_immigr$mo_yr,num_immigr$channel, num_immigr$time), sum)
colnames(num_immigr) <- c("Date","channel","time","n")
num_immigr$Date <- as.Date(num_immigr$Date)
num_immigr$Date[num_immigr$Date<as.Date("2015-01-01")&num_immigr$channel=="msnbc"] <-NA
aggregate(num_immigr$n, list(num_immigr$time,num_immigr$channel), mean)
ggplot(num_immigr, aes(x=Date, y=n, color=channel, group=paste(channel, time)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Num Monthly\nImmigration Segs")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=1500, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=1500, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=1500, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))
#new coverage measure
meta$crime_dur <- meta$crime
meta$welf_dur <- meta$welfare
meta$trump_dur <- meta$trump
crime_dur <- aggregate(meta$crime_dur, list(meta$channel, meta$date), sum)
welf_dur <- aggregate(meta$welf_dur, list(meta$channel, meta$date), sum)
trump_dur <- aggregate(meta$trump_dur, list(meta$channel, meta$date), sum)
colnames(crime_dur)<- c("channel","date","crime_news")
colnames(welf_dur)<- c("channel","date","welfare_news")
colnames(trump_dur)<- c("channel","date","trump_news")
crime_dur$date <- as.Date(crime_dur$date)
welf_dur$date <- as.Date(welf_dur$date)
trump_dur$date <- as.Date(trump_dur$date)
crime_dur <- merge(crime_dur, unique_dates, by.x=c("date","channel"), by.y=c("Date","channel"), all=T)
welf_dur <- merge(welf_dur, unique_dates, by.x=c("date","channel"), by.y=c("Date","channel"), all=T)
trump_dur <- merge(trump_dur, unique_dates, by.x=c("date","channel"), by.y=c("Date","channel"), all=T)
crime_dur$crime_news[is.na(crime_dur$crime_news)]<- 0
welf_dur$welfare_news[is.na(welf_dur$welfare_news)]<- 0
trump_dur$trump_news[is.na(trump_dur$trump_news)]<- 0
crime_dur$post_election <- ifelse(crime_dur$date>=as.Date("2017-01-20"),1,0)
crime_dur$post_trump <- ifelse(crime_dur$date>=as.Date("2015-06-16")&crime_dur$date<=as.Date("2017-01-20"),1,0)
crime_dur$time <- "pre-election"
crime_dur$time[crime_dur$post_trump==1] <- "election"
crime_dur$time[crime_dur$post_election==1] <- "post-election"
welf_dur$post_election <- ifelse(welf_dur$date>=as.Date("2017-01-20"),1,0)
welf_dur$post_trump <- ifelse(welf_dur$date>=as.Date("2015-06-16")&welf_dur$date<=as.Date("2017-01-20"),1,0)
welf_dur$time <- "pre-election"
welf_dur$time[welf_dur$post_trump==1] <- "election"
welf_dur$time[welf_dur$post_election==1] <- "post-election"
trump_dur$post_election <- ifelse(trump_dur$date>=as.Date("2017-01-20"),1,0)
trump_dur$post_trump <- ifelse(trump_dur$date>=as.Date("2015-06-16")&trump_dur$date<=as.Date("2017-01-20"),1,0)
trump_dur$time <- "pre-election"
trump_dur$time[trump_dur$post_trump==1] <- "election"
trump_dur$time[trump_dur$post_election==1] <- "post-election"
crime_dur$mo_yr <- gsub("-\\d\\d$","-01",crime_dur$date)
welf_dur$mo_yr <- gsub("-\\d\\d$","-01",welf_dur$date)
crime_agg <- aggregate(crime_dur$crime_news, list(crime_dur$mo_yr,crime_dur$channel,crime_dur$time), sum)
welf_agg <- aggregate(welf_dur$welfare_news, list(welf_dur$mo_yr,welf_dur$channel,welf_dur$time), sum)
colnames(crime_agg)<- c("Date","channel","time","n")
colnames(welf_agg)<- c("Date","channel","time","n")
crime_agg$Date <-as.Date(crime_agg$Date)
welf_agg$Date <-as.Date(welf_agg$Date)
crime_agg$n[crime_agg$Date<as.Date("2015-01-01")&crime_agg$channel=="msnbc"]<- NA
welf_agg$n[welf_agg$Date<as.Date("2015-01-01")&welf_agg$channel=="msnbc"]<- NA
#plot crime & welfare duration by day
ggplot(crime_agg, aes(x=Date, y=n, color=channel, group=paste(time,channel)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Immigr + Crime \nNews Coverage")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=120, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=120, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=120, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))
ggplot(welf_agg, aes(x=Date, y=n, color=channel, group=paste(time,channel)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Immigr Welfare \nNews Coverage")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=45, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=45, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=45, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))
crime_agg$type <- "crime"
welf_agg$type <- "welfare"
tot_agg <- rbind(crime_agg, welf_agg)
ggplot(tot_agg, aes(x=Date, y=n, color=channel))+stat_smooth(se=F)+facet_wrap(~type)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+theme(legend.position = "bottom")
library(rjson)
library(stringr)
library(ggplot2)
library(R.utils)
library(multiwayvcov)
library(lmtest)
library(stm)
library(stopwords)
library(data.table)
library(stargazer)
library(here)
library(sentimentr)
library(lubridate)
tmp <- out$meta$txt
tmp <- gsub("immigr\\w*|illegal immigr\\w*|illegals|llegal alien\\w*","", tmp, ignore.case = T)
emotions <- emotion(tmp)
agg <- aggregate(emotions$emotion, list(emotions$element_id,emotions$emotion_type),mean)
colnames(agg) <- c("doc_id","emotion","mean")
out$meta$id <- 1:nrow(out$meta)
covars <- out$meta[,c("id","date","channel","txt")]
agg <- merge(agg, covars, by.x="doc_id", by.y="id")
agg$date <- as.Date(agg$date)
day(agg$date) <- 1
anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"]), sum)
colnames(anger) <- c("date","channel","mean")
ggplot(anger, aes(x=date, y=mean, color=channel))+geom_point()+stat_smooth()
anger$emotion <- "anger"
head(agg)
head(anger)
anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"]), mean)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))
ggplot(anger, aes(x=date, y=mean, color=channel))+geom_point()+stat_smooth()+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))
ggplot(anger, aes(x=date, y=mean, color=channel))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=1500, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=1500, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=1500, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))
ggplot(anger, aes(x=date, y=mean, color=channel))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=25, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=25, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=25, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))
ggplot(anger, aes(x=date, y=mean, color=channel))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=25, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=25, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=25, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+ylim(c(0,25))
agg$prepost <- 1
agg$prepost[agg$date>=as.Date("2015-06-16")]<- 2
agg$prepost[agg$date>=as.Date("2017-01-20")]<- 3
########## FIGURE A1 ##########
anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"], agg$prepost[agg$emotion=="anger"]), mean)
colnames(anger) <- c("date","channel","prepost","mean")
ggplot(anger, aes(x=date, y=mean, color=channel, group=prepost))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=25, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=25, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=25, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+ylim(c(0,25))
anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"], agg$prepost[agg$emotion=="anger"]), mean)
colnames(anger) <- c("date","channel","prepost","mean")
anger
anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"], agg$prepost[agg$emotion=="anger"]), mean)
colnames(anger) <- c("date","channel","prepost","mean")
ggplot(anger, aes(x=date, y=mean, color=channel, group=prepost))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=0.03, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=0.03, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=0.03, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+ylim(c(0,0.03))
ggplot(anger, aes(x=date, y=mean, color=channel, group=paste0(prepost, channel)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=0.03, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=0.03, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=0.03, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+ylim(c(0,0.03))
agg$msnbc
min(agg$date[agg$channel==msnbc])
min(agg$date[agg$channel=="msnbc"])
agg[which(agg$date==as.Date("2014-12-01")&agg$channel=="msnbc")]
agg[which(agg$date==as.Date("2014-12-01")&agg$channel=="msnbc"),]
agg[which(agg$date==as.Date("2014-12-01")&agg$channel=="msnbc"),]<- NULL
agg[!which(agg$date==as.Date("2014-12-01")&agg$channel=="msnbc"),]
agg[-which(agg$date==as.Date("2014-12-01")&agg$channel=="msnbc"),]
agg <- agg[-which(agg$date==as.Date("2014-12-01")&agg$channel=="msnbc"),]
########## FIGURE A1 ##########
anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"], agg$prepost[agg$emotion=="anger"]), mean)
colnames(anger) <- c("date","channel","prepost","mean")
ggplot(anger, aes(x=date, y=mean, color=channel, group=paste0(prepost, channel)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=0.03, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=0.03, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=0.03, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+ylim(c(0,0.03))
agg <- aggregate(emotions$emotion, list(emotions$element_id,emotions$emotion_type),mean)
colnames(agg) <- c("doc_id","emotion","mean")
out$meta$id <- 1:nrow(out$meta)
covars <- out$meta[,c("id","date","channel","txt")]
agg <- merge(agg, covars, by.x="doc_id", by.y="id")
agg$date <- as.Date(agg$date)
#convert to month
day(agg$date) <- 1
######## FIGURE A1 ########
anger <- aggregate(agg$mean[agg$emotion=="anger"], list(agg$date[agg$emotion=="anger"],agg$channel[agg$emotion=="anger"]), sum)
colnames(anger) <- c("date","channel","mean")
anger$emotion <- "anger"
fear <- aggregate(agg$mean[agg$emotion=="fear"], list(agg$date[agg$emotion=="fear"],agg$channel[agg$emotion=="fear"]), sum)
colnames(fear) <- c("date","channel","mean")
fear$emotion <- "fear"
sad <- aggregate(agg$mean[agg$emotion=="sadness"], list(agg$date[agg$emotion=="sadness"],agg$channel[agg$emotion=="sadness"]), sum)
colnames(sad) <- c("date","channel","mean")
sad$emotion <- "sadness"
disg <- aggregate(agg$mean[agg$emotion=="disgust"], list(agg$date[agg$emotion=="disgust"],agg$channel[agg$emotion=="disgust"]), sum)
colnames(disg) <- c("date","channel","mean")
disg$emotion <- "disgust"
neg <- rbind(anger, fear, disg, sad)
neg$date <- as.Date(neg$date)
neg$post_election <- ifelse(neg$date>=as.Date("2017-01-20"),1,0)
neg$post_trump <- ifelse(neg$date>=as.Date("2015-06-16")&neg$date<=as.Date("2017-01-20"),1,0)
neg$time <- "pre-election"
neg$time[neg$post_trump==1] <- "election"
neg$time[neg$post_election==1] <- "post-election"
#dropping null msnbc month
neg <- neg[neg$mean!=0,]
ggplot(neg, aes(x=date, y=mean, color=channel, group=paste(channel, time)))+geom_point()+stat_smooth(se=F)+scale_color_manual(values=c("magenta","firebrick3","royalblue3"))+xlab(NULL)+ylab("Total Emotion")+geom_vline(xintercept=as.Date("2015-06-16"), linetype="dashed")+geom_vline(xintercept=as.Date("2017-01-20"), linetype="dashed")+geom_label(x=as.Date("2014-08-01"), y=25, label="Pre-Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2016-05-01"), y=25, label="Campaign", size=3,show.legend = FALSE, color="black")+geom_label(x=as.Date("2018-06-01"), y=25, label="Post-Inauguration", size=3,show.legend = FALSE, color="black")+theme(legend.position = "bottom",legend.title = element_blank(),axis.title=element_text(size=8))+facet_wrap(~emotion)
